package cn.wlh.learn.webmagic.processor;

import cn.wlh.learn.webmagic.pipeline.PicPipeline;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.pipeline.ConsolePipeline;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

/**
 * http://www.mmonly.cc/ktmh/hzw/list_34_1.html
 * 唯一图库，测试图片下载。
 */
public class WeiYiTKProcessor implements PageProcessor {

    private Site site = Site.me().setRetryTimes(3).setSleepTime(1000).setTimeOut(10000);

    @Override
    public void process(Page page) {
        if (page.getUrl().toString().startsWith("http://www.mmonly.cc/ktmh/hzw/list_")) {
            // System.out.println(1);
            // 获取详情页面
            page.addTargetRequests(page.getHtml().$("div.item_t > div > div.ABox > a").links().all());
            // 获取下一页，倒数第个a标签
            page.addTargetRequest(page.getHtml().$("#pageNum > a:nth-last-child(2)").links().toString());
        } else if (page.getUrl().regex("http://www.mmonly.cc/ktmh/hzw/[\\d]+") != null) {
            // System.out.println(page.getUrl());
            // 下一页
            Selectable links = page.getHtml().$("#nl > a").links();
            if (links != null && links.toString() != "##")
                page.addTargetRequest(links.toString());
            // 抓取内容
            String img = page.getHtml().$("#big-pic p img").toString();
            if (img == "null")
                img = page.getHtml().$("#big-pic a img").toString();
            img = img.substring(img.indexOf("src=\"") + 5, img.length() - 2);
            // System.out.println(img);
            page.putField("img", img);
        }
    }

    @Override
    public Site getSite() {
        return site;
    }

    public static void main(String[] args) {
        Spider.create(new WeiYiTKProcessor()).addUrl("http://www.mmonly.cc/ktmh/hzw/list_34_1.html")
                .addPipeline(new PicPipeline("E:\\spider\\")).addPipeline(new ConsolePipeline()).thread(5).run();
    }
}